rm(list = ls())

## Load libraries

library(ggplot2, warn.conflicts = FALSE)
library(data.table, warn.conflicts = FALSE)
library(plm, warn.conflicts = FALSE)
suppressPackageStartupMessages(library(tidyverse, warn.conflicts = FALSE))

## Load data

load("../generated_data/bes_w1_w21_pacer_w1_w2_merged.Rdata")

setkey(bes_long, id, wave)

########################
## Reshape data for analysis

## Outcome variables

outcome_vars <- c("redistSelf", "ptvCon", "ptvLab", "ptvLD", "taxSpendSelf" ,"trustWestminster" ,"deficitReduce", "riskTaking", "approveUKGovt", "govtHandleCostLive", "govtHandleEcon", "govtHandleImmig", "govtHandleNHS", "govtHandleEduc", "govtHandleLevelCrime", "govtCorpSupport", "ideoBHPSPprivateEnterprise", "ideoBHPSSstateOwnership", "ideoBHPSJjobForAll", "ideoNoNeedUnions", "ideoFairShareWealth", "ideoOneLawRich", "reasonForUnemployment", "immigrantsWelfareState", "govtHandouts", "benefitsNotDeserved", "riskPoverty", "riskUnemployment", "lockdownApproval")


bes_long <- tibble(bes_long)


out <- bes_long %>%
  filter((id_in_any_pacer == TRUE|id_in_bes_w20 == TRUE) &
           id_in_wave == TRUE) %>%
  mutate(income_guarantee_treatment = ifelse(is.na(income_guarantee_binary),FALSE, income_guarantee_binary),
         income_guarantee_covid_treatment = ifelse(is.na(income_guarantee_covid_binary),FALSE, income_guarantee_covid_binary),
         income_guarantee_covid_job_treatment = ifelse(is.na(income_guarantee_covid_job_binary),FALSE, income_guarantee_covid_job_binary),
         income_guarantee_covid_self_treatment = ifelse(is.na(income_guarantee_covid_self_binary),FALSE, income_guarantee_covid_self_binary),
         income_guarantee_other_treatment = ifelse(is.na(income_guarantee_other_binary),FALSE, income_guarantee_other_binary),
         workingStatus_imputed_ordinal = factor(workingStatus,
                                                levels = rev(c("Working full time (30 or more hours per week)", 
                                                               "Working part time (8-29 hours a week)", 
                                                               "Working part time (less than 8 hours a week)", 
                                                               "Unemployed and looking for work"))),
         workingStatus_imputed_ordinal_2 = factor(recode(workingStatus,
                                                         "Not in paid work for any other reason" = "Unemployed and looking for work"),
                                                  levels = rev(c("Working full time (30 or more hours per week)", 
                                                                 "Working part time (8-29 hours a week)", 
                                                                 "Working part time (less than 8 hours a week)", 
                                                                 "Unemployed and looking for work"))),
         vote19 = recode_factor(p_past_vote_2019,
                                Conservative = "Conservative",
                                Labour = "Labour",
                                .default = "Other"),
         income_guarantee_furlugh_ws = case_when(is.na(workingStatus) ~ NA,
                                                 workingStatus == "Furloughed" ~ TRUE,
                                                 workingStatus != "Furloughed" ~ FALSE),
         polAttention_numeric = as.numeric(factor(polAttention, levels = c("Pay no attention",1:9,"Pay a great deal of attention")))) %>%
  rename(cross_sectional_weight = cross_sectional_weight_) %>%
  group_by(id) %>%
  mutate(lost_employment = ifelse(wave < 19.5 | length(wave[wave<19.5]) == 0, FALSE, 
                                  as.numeric(workingStatus_imputed_ordinal)[wave == max(wave[wave < 19.5])] > 
                                    as.numeric(workingStatus_imputed_ordinal)),
         lost_employment = ifelse(is.na(lost_employment), FALSE, lost_employment),
         lost_employment_2 = ifelse(wave < 19.5 | length(wave[wave<19.5]) == 0, FALSE, 
                                    as.numeric(workingStatus_imputed_ordinal_2)[wave == max(wave[wave < 19.5])] > 
                                      as.numeric(workingStatus_imputed_ordinal_2)),
         lost_employment_2 = ifelse(is.na(lost_employment_2), FALSE, lost_employment_2),
         
         # Pre-crisis attitudes
         ## Mean prior attitudes
         redistSelf_prior = mean(as.numeric(redistSelf[wave <= 19]), na.rm = TRUE),
         taxSpendSelf_prior = mean(as.numeric(taxSpendSelf[wave <= 19]), na.rm = TRUE),
         
         ## Mean prior attitudes - BES14 onwards
         redistSelf_prior14 = mean(as.numeric(redistSelf[wave <= 19 & wave >= 14]), na.rm = TRUE),
         taxSpendSelf_prior14 = mean(as.numeric(taxSpendSelf[wave <= 19 & wave >= 14]), na.rm = TRUE),
         
         ## Recent prior attitudes
         redistSelf_prior_recent = ifelse(sum(wave<=19 & !is.na(redistSelf)) != 0,
                                          as.numeric(redistSelf[wave == max(wave[wave<=19 & !is.na(redistSelf)])]),
                                          NA),
         
         taxSpendSelf_prior_recent = ifelse(sum(wave<=19 & !is.na(taxSpendSelf)) != 0,
                                            as.numeric(taxSpendSelf[wave == max(wave[wave<=19 & !is.na(taxSpendSelf)])]),
                                            NA),
         
         ## Don't know attitudes
         redistSelf_dk_prior = any(redistSelf_dk[wave <= 19], na.rm = TRUE),
         taxSpendSelf_dk_prior = any(taxSpendSelf_dk[wave <= 19], na.rm = TRUE),
         
         ### Remove any observations for which we don't have any prior attitudes
         none_redistSelf_prior = all(is.na(redistSelf_dk[wave <= 19])),
         none_taxSpendSelf_prior = all(is.na(taxSpendSelf_dk[wave <= 19])),
         redistSelf_dk_prior = ifelse(none_redistSelf_prior, NA, redistSelf_dk_prior),
         taxSpendSelf_dk_prior = ifelse(none_taxSpendSelf_prior, NA, taxSpendSelf_dk_prior),
         
         ## Political attention
         polAttention_prior = mean(as.numeric(polAttention_numeric[wave <= 19]), na.rm = TRUE)
         
  ) %>%
  ungroup() 


# Specify prior attitude high and low groups

## Averaging across all pre-crisis waves

median_redistSelf_prior <- out %>% group_by(id) %>% summarise(x = unique(redistSelf_prior)) %>% pull(var = x) %>% median(na.rm = TRUE)
median_taxSpendSelf_prior <- out %>% group_by(id) %>% summarise(x = unique(taxSpendSelf_prior)) %>% pull(var = x) %>% median(na.rm = TRUE)
median_polAttention_prior <- out %>% group_by(id) %>% summarise(x = unique(polAttention_prior)) %>% pull(var = x) %>% median(na.rm = TRUE)

## Averaging across all pre-crisis waves from BES14 onwards
median_redistSelf_prior14 <- out %>% group_by(id) %>% summarise(x = unique(redistSelf_prior14)) %>% pull(var = x) %>% median(na.rm = TRUE)
median_taxSpendSelf_prior14 <- out %>% group_by(id) %>% summarise(x = unique(taxSpendSelf_prior14)) %>% pull(var = x) %>% median(na.rm = TRUE)

## Most recently expressed prior attitude
median_redistSelf_prior_recent <- out %>% group_by(id) %>% summarise(x = unique(redistSelf_prior_recent)) %>% pull(var = x) %>% median(na.rm = TRUE)
median_taxSpendSelf_prior_recent <- out %>% group_by(id) %>% summarise(x = unique(taxSpendSelf_prior_recent)) %>% pull(var = x) %>% median(na.rm = TRUE)

out <- out %>%
  mutate(polAttention_prior_group = factor(ifelse(polAttention_prior >= median_polAttention_prior, "High", "Low"), levels = c("Low", "High")),
         redistSelf_prior_group = case_when(redistSelf_prior <= 5 ~ "Right",
                                            redistSelf_prior >=5 & redistSelf_prior <=7  ~ "Mid",
                                            redistSelf_prior >= 7 ~ "Left"),
         redistSelf_prior_group14 = case_when(redistSelf_prior14 <= 5 ~ "Right",
                                              redistSelf_prior14 >=5 & redistSelf_prior14 <=7  ~ "Mid",
                                              redistSelf_prior14 >= 7 ~ "Left"),
         redistSelf_prior_group_recent = case_when(redistSelf_prior_recent <= 5 ~ "Right",
                                                   redistSelf_prior_recent >=5 & redistSelf_prior_recent <=7  ~ "Mid",
                                                   redistSelf_prior_recent >= 7 ~ "Left"),
         
         taxSpendSelf_prior_group = case_when(taxSpendSelf_prior <= 5 ~ "Right",
                                              taxSpendSelf_prior >=5 & taxSpendSelf_prior <=7  ~ "Mid",
                                              taxSpendSelf_prior >= 7 ~ "Left"),
         
         taxSpendSelf_prior_group14 = case_when(taxSpendSelf_prior14 <= 5 ~ "Right",
                                                taxSpendSelf_prior14 >=5 & taxSpendSelf_prior14 <=7  ~ "Mid",
                                                taxSpendSelf_prior14 >= 7 ~ "Left"),
         
         taxSpendSelf_prior_group_recent = case_when(taxSpendSelf_prior_recent <= 5 ~ "Right",
                                                     taxSpendSelf_prior_recent >=5 & taxSpendSelf_prior_recent <=7  ~ "Mid",
                                                     taxSpendSelf_prior_recent >= 7 ~ "Left")
  ) 

############################
## Outputs for draft


## Function for implementing plm on arbitrary outcomes

plm_1 <- function(y, my_formula = NULL){
  #cat(paste0(y," \n"))
  
  out$Y <- scale(as.numeric(out[[y]]))
  
  plm(as.formula(my_formula),
      effect = "twoway",
      model = "within",
      index = c("id","wave"),
      data = out,
      weights = out$cross_sectional_weight) 
}

## Estimate FE models with W21 data, using income_guarantee_furlugh_ws as treatment variable

outcome_vars <- c("redistSelf", "taxSpendSelf", "ideoBHPSJjobForAll")

baseline_formula <- "Y ~ lost_employment + income_guarantee_covid_job_treatment"

baseline_out_income_guarantee <- lapply(outcome_vars, function(x) plm_1(y = x, my_formula = baseline_formula)) 

names(baseline_out_income_guarantee) <- outcome_vars


baseline_formula_w21 <- "Y ~ lost_employment + income_guarantee_furlugh_ws"

baseline_out_income_guarantee_w21 <- lapply(outcome_vars, function(x) plm_1(y = x, my_formula = baseline_formula_w21)) 

names(baseline_out_income_guarantee_w21) <- outcome_vars


get_marginal_effects <- function(x = NULL){
  
  fx_mean <- coef(x)
  fx_se <- sqrt(diag(vcov(x)))
  fx_hi <- fx_mean + 1.96 * fx_se
  fx_lo <- fx_mean - 1.96 * fx_se
  
  est_out <- data.frame(term = names(coef(x)),
                        estimate = fx_mean,
                        se = fx_se,
                        conf.high = fx_hi,
                        conf.low = fx_lo,
                        row.names = NULL)
  
  est_out <- tibble(est_out)
  
  return(est_out)
  
}


baseline_out_income_guarantee_coefs <- baseline_out_income_guarantee %>% 
  map(~ get_marginal_effects(.x)) %>% 
  bind_rows(.id = "var") %>%
  mutate(term = recode(term, lost_employmentTRUE = "Lost Employment",
                       income_guarantee_covid_job_treatmentTRUE = "Furlough"))

baseline_out_income_guarantee_coefs_w21 <- baseline_out_income_guarantee_w21 %>% 
  map(~ get_marginal_effects(.x)) %>% 
  bind_rows(.id = "var") %>%
  mutate(term = recode(term, lost_employmentTRUE = "Lost Employment",
                       income_guarantee_furlugh_wsTRUE = "Furlough"))


baseline_out_income_guarantee_coefs$wave <- FALSE
baseline_out_income_guarantee_coefs_w21$wave <- TRUE

bind_rows(baseline_out_income_guarantee_coefs, baseline_out_income_guarantee_coefs_w21) %>%
  filter(term == "Furlough" & var %in% c("taxSpendSelf", "redistSelf")) %>%
  mutate(var = factor(var, levels = outcome_vars),
         var = recode_factor(var, `ideoBHPSJjobForAll`= "jobsForAll")) %>%
  ggplot(aes(x = estimate, xmin = conf.low, xmax = conf.high, y = var, group = wave, col = as.factor(wave))) + geom_point(position = position_dodge(width = .25)) + 
  geom_errorbarh(height = 0.001,position = position_dodge(width = .25)) + 
  geom_vline(xintercept = 0, linetype = 2) + 
  scale_color_manual("Include BES wave 21", values = c("black", "red")) + 
  theme_bw() +
  theme(axis.text = element_text(size = 12),
        text = element_text(size = 12),
        legend.position = "bottom") + 
  ylab("") + 
  xlab("Treatment effect") +
  xlim(c(-1,1)) + 
  facet_wrap(~term)
ggsave("../generated_images/fe_baseline_ideo_vars_w21_comparison.tiff", height = 4, width = 10, dpi = 800)
ggsave("../generated_images/fe_baseline_ideo_vars_w21_comparison.pdf", height = 4, width = 10)

## Parallel trends plots including w21

wave_by_wave1 <- out %>%
  filter(wave > 14 & !is.na(taxSpendSelf)) %>%
  group_by(id) %>%
  mutate(income_guarantee_treatment_all_waves = any(income_guarantee_furlugh_ws),
         lost_employment_all_waves = any(lost_employment)) %>%
  ungroup() %>%
  nest(data = -wave) %>%
  mutate(fit_1 = map(data, ~ broom::tidy(lm(scale(as.numeric(taxSpendSelf)) ~ -1 + income_guarantee_treatment_all_waves, weights = cross_sectional_weight, data= .x), conf.int = TRUE)))

wave_by_wave2 <- out %>%
  filter(wave > 14 ) %>%
  group_by(id) %>%
  mutate(income_guarantee_treatment_all_waves = any(income_guarantee_furlugh_ws),
         lost_employment_all_waves = any(lost_employment)) %>%
  ungroup() %>%
  nest(data = -wave) %>%
  mutate(fit_2 = map(data, ~ broom::tidy(lm(scale(as.numeric(redistSelf)) ~ -1 + income_guarantee_treatment_all_waves, weights = cross_sectional_weight, data= .x), conf.int = TRUE)))


a <- wave_by_wave1 %>%
  select(wave, fit_1) %>%
  unnest(fit_1) %>%
  mutate(treat_var = "Furlough",
         outcome = "taxSpendSelf") 

b <- wave_by_wave2 %>%
  select(wave, fit_2) %>%
  unnest(fit_2) %>%
  mutate(treat_var = "Furlough",
         outcome = "redistSelf") 

combined <- bind_rows(a,b) %>%
  mutate(term = ifelse(grepl("TRUE", term), TRUE, FALSE))

combined %>%
  ggplot(aes(x = wave, y = estimate, ymin = conf.low, ymax = conf.high, col = term)) + geom_pointrange() + geom_line() + 
  facet_grid(treat_var~outcome) + 
  ylim(c(-.7,.7)) + 
  theme_bw() + 
  xlab("Wave") + 
  scale_x_continuous(breaks = c(1:19,19.5,20,20.5,21), labels = c(paste0("BES",1:19),"PACER W1", "BES20", "PACER W2", "BES21")) + 
  theme(panel.grid.minor = element_blank(), axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + 
  geom_vline(xintercept = 19.25, linetype = 2) + 
  scale_color_manual("",values = c("black", "red"), labels = c("Not furloughed", "Furloughed")) + 
  ylab("Mean response") + 
  theme(legend.position = "bottom")

ggsave("../generated_images/parallel_trends_redistSelf_taxSpendSelf_w21_comparison.pdf", width = 9, height = 4)


# Raw outcome by wave

wave_dates <- as.Date(c("2014-02-20",
                        "2014-05-22",
                        "2014-09-19",
                        "2015-03-04",
                        "2015-03-31",
                        "2015-05-08",
                        "2016-04-14",
                        "2016-05-06",
                        "2016-06-24",
                        "2016-11-24",
                        "2017-04-24",
                        "2017-05-05",
                        "2017-06-09",
                        "2018-05-04",
                        "2019-03-11",
                        "2019-05-24",
                        "2019-11-01",
                        "2019-11-13",
                        "2019-12-13",
                        "2020-04-18",
                        "2020-06-03",
                        "2020-09-15",
                        "2021-05-08")) 

wave_dates <- data.frame(wave = c(1:19,19.5, 20, 20.5, 21), wave_dates)

bes_long <- merge(bes_long, wave_dates, by = "wave")
out <- merge(out, wave_dates, by = "wave")


plot_by_wave_interaction <- function(var = "taxSpendSelf", interaction ="", title = "Support for redistribution by wave"){
  
  title <- gsub("ideoBHPSJjobForAll", "jobsForAll", title)
  
  for_plot <- out %>%
    mutate(outcome = as.numeric(get(var)),
           interaction = get(interaction)) %>%
    filter(!is.na(cross_sectional_weight) & !is.na(outcome)) %>%
    group_by(wave_dates) %>%
    mutate(n = sum(!is.na(outcome))) %>%
    filter(n != 1) %>%
    ungroup() %>%
    filter(!is.na(interaction)) %>%
    nest(data = -interaction) %>%
    mutate(mod = map(data, ~ estimatr::lm_robust(.$outcome ~ as.factor(.$wave_dates) - 1, weights = .$cross_sectional_weight)),
           tidied = map(mod, DeclareDesign::tidy)) %>%
    unnest(tidied) %>%
    mutate(date = as.Date(substring(term,24, 1000))) %>% 
    filter(statistic != Inf) 
  
  for_plot %>%
    ggplot(aes(x = date, y = estimate, ymin = conf.low, ymax = conf.high, col = interaction)) + 
    geom_linerange() + 
    geom_line()  + 
    geom_point() + 
    theme_bw() + 
    theme(legend.position = "bottom",panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + 
    scale_x_continuous(breaks = as.Date(paste0(2014:2021,"-01-01")), labels = 2014:2021, limits = range(c(out$wave_dates, as.Date("2021-01-01")))) + 
    geom_vline(xintercept = as.Date("2020-03-16"), lty = 2)+
    ggtitle(title) + xlab("Wave") + ylab("") +
    scale_color_manual("", values = c("orange", "darkgreen", "brown")[1:length(unique(for_plot$interaction))])
  
}


plot_by_wave_interaction("taxSpendSelf", "taxSpendSelf_prior_group", "taxSpendSelf by prior attitudes")
ggsave(paste0("../generated_images/taxSpendSelf_by_prior_by_wave_21.pdf"), width = 6, height = 4)

plot_by_wave_interaction("taxSpendSelf", "vote19", "taxSpendSelf by past vote")
ggsave(paste0("../generated_images/taxSpendSelf_by_past_vote_by_wave_21.pdf"), width = 6, height = 4)

plot_by_wave_interaction("redistSelf", "redistSelf_prior_group", "redistSelf by prior attitudes")
ggsave(paste0("../generated_images/redistSelf_by_prior_by_wave_21.pdf"), width = 6, height = 4)

plot_by_wave_interaction("redistSelf", "vote19", "redistSelf by past vote")
ggsave(paste0("../generated_images/redistSelf_by_past_vote_by_wave_21.pdf"), width = 6, height = 4)

